1 Introduction

The objective of this notebook is to refine the clustering annotation done at level 3. This refinement is the result of a manual curation carried out by specialists to remove poor quality cells, misclassified cells or clusters with very few cells.

2 Pre-processing

2.1 Load packages

library(Seurat)
library(Signac)
library(tidyverse)
library(reshape2)
library(ggpubr)
library(harmony)

2.2 Parameters

cell_type = "Cytotoxic"

# Paths
path_to_CD4_T_obj <- str_c(
  here::here("scATAC-seq/results/R_objects/level_3/"),
  "CD4_T",
  "/",
  "CD4_T",
  "_integrated_level_3.rds",
  sep = ""
)

path_to_obj <- str_c(
  here::here("scATAC-seq/results/R_objects/level_3/"),
  cell_type,
  "/",
  cell_type,
  "_integrated_level_3.rds",
  sep = ""
)

path_to_obj_RNA <- str_c(
  here::here("scRNA-seq/3-clustering/5-level_5/"),
  cell_type,
    "/CD8_T_level_5_annotated_level_5.rds")

# Functions
source(here::here("scRNA-seq/bin/utils.R"))


# Colors
color_palette <-  c("#1CFFCE", "#90AD1C", "#C075A6", "#85660D", 
                    "#5A5156", "#AA0DFE", "#F8A19F", "#F7E1A0", 
                    "#1C8356", "#FEAF16", "#822E1C", "#C4451C", 
                    "#1CBE4F", "#325A9B", "#F6222E", "#FE00FA", 
                    "#FBE426", "#16FF32", "black", "#3283FE",
                    "#B00068", "#DEA0FD", "#B10DA1", "#E4E1E3",
                    "#90AD1C", "#FE00FA", "#85660D", "#3B00FB",
                    "#822E1C", "coral2", "#1CFFCE", "#1CBE4F",
                    "#3283FE", "#FBE426", "#F7E1A0", "#325A9B",   
                    "#2ED9FF", "#B5EFB5", "#5A5156", "#DEA0FD",
                    "#FEAF16", "#683B79", "#B10DA1", "#1C7F93", 
                    "#F8A19F", "dark orange", "#FEAF16", "#FBE426",  
                    "Brown")


path_to_save <- str_c(
  here::here("scATAC-seq/results/R_objects/level_4/"),
  cell_type,
  "/01.",
  cell_type,
  "_integrated_level_4.rds",
  sep = ""
)

3 Load data

3.1 Load CD4-T data

We are going to add a specific cluster from a previous CD4-T object that corresponds to naïve CD8-T cells.

seurat_CD4 <- readRDS(path_to_CD4_T_obj)

DimPlot(seurat_CD4,
  pt.size = 0.1) 

seurat_RNA <- readRDS(path_to_obj_RNA)

DimPlot(seurat_RNA,
  pt.size = 0.1) 

tonsil_RNA_annotation <- seurat_RNA@meta.data %>%
  rownames_to_column(var = "cell_barcode") %>%
  dplyr::filter(assay == "multiome") %>%
  dplyr::select("cell_barcode", "annotation_paper")

Naive_multiome_barcode <- tonsil_RNA_annotation[tonsil_RNA_annotation$annotation_paper == "Naive CD8 T",]$cell_barcode
doublets_multiome_barcode <- tonsil_RNA_annotation[tonsil_RNA_annotation$annotation_paper == "doublets",]$cell_barcode

3.1.1 Sub-clustering CD4-T cells

We project the barcodes from multiome naive CD8-T onto the CD4-T object and verify the cluster that correspond to this group. We will extract and integrate it into the final CD8-T object.

DimPlot(
  seurat_CD4, 
  reduction = "umap",
  cols.highlight = "darkred", 
  cols= "grey",
  cells.highlight= Naive_multiome_barcode,
  pt.size = 0.1
)

seurat_CD4 <- FindClusters(seurat_CD4, resolution = 0.05)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 21819
## Number of edges: 769264
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9541
## Number of communities: 5
## Elapsed time: 3 seconds
DimPlot(
  seurat_CD4, 
  reduction = "umap",
  pt.size = 0.1
)

selected_cells <- row.names(seurat_CD4@meta.data[seurat_CD4@meta.data$seurat_clusters == 1,])

seurat_CD8_naive <- subset(seurat_CD4, 
                           cells = unique(selected_cells))

DimPlot(seurat_CD8_naive,
  pt.size = 0.1) 

4 Load Cytotoxic data

seurat_Cytotoxic <- readRDS(path_to_obj)
seurat_Cytotoxic
## An object of class Seurat 
## 270784 features across 3395 samples within 1 assay 
## Active assay: peaks_macs (270784 features, 107717 variable features)
##  3 dimensional reductions calculated: umap, lsi, harmony
DimPlot(seurat_Cytotoxic,
  pt.size = 0.1) 

5 Merging naive CD8-T with CD8-T main object

seurat <- merge(
  x = seurat_Cytotoxic,
  y = seurat_CD8_naive)

seurat
## An object of class Seurat 
## 270784 features across 4870 samples within 1 assay 
## Active assay: peaks_macs (270784 features, 0 variable features)

6 Integration

seurat <- seurat %>%
  RunTFIDF() %>%
  FindTopFeatures(min.cutoff = 10) %>%
  RunSVD()

DepthCor(seurat)

seurat <- RunUMAP(object = seurat, reduction = 'lsi', dims = 2:40)

DimPlot(seurat,
        group.by = "assay")

seurat <- RunHarmony(
  object = seurat,
  dims = 2:40,
  group.by.vars = 'assay',
  reduction = 'lsi',
  assay.use = 'peaks_macs',
  project.dim = FALSE,
  max.iter.harmony = 20
)

seurat <- RunUMAP(seurat, reduction = "harmony", dims = 2:20)
seurat <- FindNeighbors(seurat, reduction = "harmony", dims = 2:20)

DimPlot(seurat, 
        group.by = "assay",
        pt.size = 0.2)

7 Checking low quality cells

tonsil_ATAC_cell_barcode <- seurat@meta.data %>%
  rownames_to_column(var = "cell_barcode") %>%
  dplyr::filter(assay == "multiome") %>%
  dplyr::select("cell_barcode")

possible_doublets_ATAC <-setdiff(tonsil_ATAC_cell_barcode$cell_barcode,
                                 tonsil_RNA_annotation$cell_barcode)

seurat$quality_cells <- ifelse(colnames(seurat) %in% possible_doublets_ATAC, 
                               "Poor-quality", "Good-quality")

7.1 Visualize the projection of the problematic cells onto the scATAC-seq UMAP

DimPlot(
  seurat, group.by = "quality_cells")

7.2 Doublet cluster defined at scRNAseq level.

DimPlot(
  seurat, 
  reduction = "umap",
  cols.highlight = "darkred", 
  cols= "grey",
  cells.highlight= doublets_multiome_barcode,
  pt.size = 0.1
)

7.3 Scrublet prediction

DimPlot(seurat, group.by = "scrublet_predicted_doublet_atac")

table(seurat$scrublet_predicted_doublet_atac)
## 
## FALSE  TRUE 
##  4579   291

7.4 QC metrics

qc_vars <- c(
  "nCount_peaks",
  "nFeature_peaks",
  "nucleosome_signal",
  "TSS.enrichment"
)
qc_gg <- purrr::map(qc_vars, function(x) {
  p <- FeaturePlot(seurat, features = x, max.cutoff = "q95")
  p
})
qc_gg
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

7.5 Chromatin Signature

qc_vars <- c("NBC.MBC", "GCBC", "PC", "CD4.T", "Cytotoxic")
qc_gg <- purrr::map(qc_vars, function(x) {
  p <- FeaturePlot(seurat, feature = x, 
                   max.cutoff = 4, min.cutoff = -4) + 
    scale_color_viridis_c(option = "magma")
  p
})
qc_gg
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

8 Exclude problematic clusters and poor quality cells.

seurat <- FindClusters(seurat, resolution = 0.2)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 4870
## Number of edges: 177379
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9182
## Number of communities: 9
## Elapsed time: 0 seconds
DimPlot(seurat, 
        label = T,
        pt.size = 0.4)

selected_cells <- colnames(seurat)[!(seurat$peaks_macs_snn_res.0.2 %in% c("3","6","7"))]
selected_cells <- setdiff(selected_cells,c(doublets_multiome_barcode))

seurat_clean <- subset(seurat, 
                 cells = selected_cells, 
                 quality_cells == "Good-quality")

DimPlot(seurat_clean, 
        pt.size = 0.4)

9 Integration

seurat_clean <- seurat_clean %>%
  RunTFIDF() %>%
  FindTopFeatures(min.cutoff = 10) %>%
  RunSVD()

DepthCor(seurat_clean)

seurat_clean <- RunUMAP(object = seurat_clean, reduction = 'lsi', dims = 2:40)

DimPlot(seurat_clean,
        group.by = "assay")

seurat_clean <- RunHarmony(
  object = seurat_clean,
  dims = 2:40,
  group.by.vars = 'assay',
  reduction = 'lsi',
  assay.use = 'peaks_macs',
  project.dim = FALSE,
  max.iter.harmony = 20
)

seurat_clean <- RunUMAP(seurat_clean, reduction = "harmony", dims = 2:14)

DimPlot(seurat_clean, 
        group.by = "assay",
        pt.size = 0.5)

9.1 UMAP level 1

umap_level_1 <- FeatureScatter(
  seurat,
  "UMAP_1_level_1",
  "UMAP_2_level_1",
  group.by = "annotation_level_1"
)
umap_level_1 <- umap_level_1 +
  theme(
    #legend.position = "none",
    plot.title = element_blank()
  )
umap_level_1

10 Save

saveRDS(seurat_clean, path_to_save)

11 Session Information

sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS/LAPACK: /Users/pauli/opt/anaconda3/envs/Motif_TF/lib/libopenblasp-r0.3.10.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] harmony_1.0        Rcpp_1.0.6         ggpubr_0.4.0       reshape2_1.4.4     forcats_0.5.0      stringr_1.4.0      dplyr_1.0.7        purrr_0.3.4        readr_1.4.0        tidyr_1.1.3        tibble_3.1.2       ggplot2_3.3.5      tidyverse_1.3.0    Signac_1.2.1       SeuratObject_4.0.2 Seurat_4.0.3       BiocStyle_2.16.1  
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.2.1             reticulate_1.20        tidyselect_1.1.1       htmlwidgets_1.5.3      grid_4.0.3             docopt_0.7.1           BiocParallel_1.22.0    Rtsne_0.15             munsell_0.5.0          codetools_0.2-17       ica_1.0-2              future_1.21.0          miniUI_0.1.1.1         withr_2.4.2            colorspace_2.0-2       knitr_1.30             rstudioapi_0.11        stats4_4.0.3           ROCR_1.0-11            ggsignif_0.6.0         tensor_1.5             listenv_0.8.0          labeling_0.4.2         slam_0.1-47            GenomeInfoDbData_1.2.3 polyclip_1.10-0        farver_2.1.0           rprojroot_2.0.2        parallelly_1.26.1      vctrs_0.3.8            generics_0.1.0         xfun_0.18              lsa_0.73.2             ggseqlogo_0.1          R6_2.5.0               GenomeInfoDb_1.24.2    bitops_1.0-7           spatstat.utils_2.2-0   assertthat_0.2.1       promises_1.2.0.1       scales_1.1.1           gtable_0.3.0           globals_0.14.0         goftest_1.2-2          rlang_0.4.11           RcppRoll_0.3.0         splines_4.0.3          rstatix_0.6.0          lazyeval_0.2.2         spatstat.geom_2.2-0    broom_0.7.2           
##  [52] BiocManager_1.30.10    yaml_2.2.1             abind_1.4-5            modelr_0.1.8           backports_1.1.10       httpuv_1.6.1           tools_4.0.3            bookdown_0.21          ellipsis_0.3.2         spatstat.core_2.2-0    RColorBrewer_1.1-2     BiocGenerics_0.34.0    ggridges_0.5.3         plyr_1.8.6             zlibbioc_1.34.0        RCurl_1.98-1.2         rpart_4.1-15           deldir_0.2-10          pbapply_1.4-3          cowplot_1.1.1          S4Vectors_0.26.0       zoo_1.8-9              haven_2.3.1            ggrepel_0.9.1          cluster_2.1.0          fs_1.5.0               here_1.0.1             magrittr_2.0.1         RSpectra_0.16-0        data.table_1.14.0      scattermore_0.7        openxlsx_4.2.4         lmtest_0.9-38          reprex_0.3.0           RANN_2.6.1             SnowballC_0.7.0        fitdistrplus_1.1-5     matrixStats_0.59.0     hms_0.5.3              patchwork_1.1.1        mime_0.11              evaluate_0.14          xtable_1.8-4           rio_0.5.16             sparsesvd_0.2          readxl_1.3.1           IRanges_2.22.1         gridExtra_2.3          compiler_4.0.3         KernSmooth_2.23-17     crayon_1.4.1          
## [103] htmltools_0.5.1.1      mgcv_1.8-33            later_1.2.0            lubridate_1.7.9        DBI_1.1.0              tweenr_1.0.1           dbplyr_1.4.4           MASS_7.3-53            Matrix_1.3-4           car_3.0-10             cli_3.0.0              parallel_4.0.3         igraph_1.2.6           GenomicRanges_1.40.0   pkgconfig_2.0.3        foreign_0.8-80         plotly_4.9.4.1         spatstat.sparse_2.0-0  xml2_1.3.2             XVector_0.28.0         rvest_0.3.6            digest_0.6.27          sctransform_0.3.2      RcppAnnoy_0.0.18       spatstat.data_2.1-0    Biostrings_2.56.0      rmarkdown_2.5          cellranger_1.1.0       leiden_0.3.8           fastmatch_1.1-0        uwot_0.1.10            curl_4.3.2             shiny_1.6.0            Rsamtools_2.4.0        lifecycle_1.0.0        nlme_3.1-150           jsonlite_1.7.2         carData_3.0-4          viridisLite_0.4.0      fansi_0.5.0            pillar_1.6.1           lattice_0.20-41        fastmap_1.1.0          httr_1.4.2             survival_3.2-7         glue_1.4.2             qlcMatrix_0.9.7        zip_2.1.1              png_0.1-7              ggforce_0.3.2          stringi_1.6.2         
## [154] blob_1.2.1             irlba_2.3.3            future.apply_1.7.0